library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
Data Source https://quantdev.ssri.psu.edu/tutorials/intro-basic-exploratory-factor-analysis
“For this example, we use data from the web that are collected and distributed at https://openpsychometrics.org/_rawdata/. The data were obtained from 19,719 participants (rows) who provided answers to the Big Five Personality Test, constructed with items from the International Personality Item Pool. Data columns include gender, age, race, native language, country, and answers to the 50 likert rated statements (1-5;0 if missed; 1 was labeled as “strongly disagree”, 2 was labeled as “disagree”, 3 was labeled as “neither agree not disagree”, 4 was labeled as “agree” and 5 was labeled as “strongly agree”.) The original files can be obtaned at http://openpsychometrics.org/_rawdata/BIG5.zip”
BigData <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/BigData.xlsx"))
BigData[BigData==0] <- NA
BigData <- BigData[complete.cases(BigData),]
BigData <- BigData[BigData$age<100,]
BigData <- BigData[,-c(1,3,5,6,7)]
BigData$gender <- 1*(BigData$gender==1)
studyName <- "Personality"
dataframe <- BigData
outcome <- "gender"
TopVariables <- 10
thro <- 0.20
cexheat = 0.25
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 19303 | 51 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 11831 | 7472 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples
dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
0.7683127
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 51 , Uni p: 0.007904174 , Uncorrelated Base: 2 , Outcome-Driven Size: 0 , Base Size: 2
#>
#>
1 <R=0.768,r=0.484,N= 33>, Top: 11( 4 )[ 1 : 11 Fa= 11 : 0.535 ]( 11 , 17 , 0 ),<|>Tot Used: 28 , Added: 17 , Zero Std: 0 , Max Cor: 0.571
#>
2 <R=0.571,r=0.385,N= 33>, Top: 7( 4 )[ 1 : 7 Fa= 14 : 0.385 ]( 7 , 12 , 11 ),<|>Tot Used: 36 , Added: 12 , Zero Std: 0 , Max Cor: 0.468
#>
3 <R=0.468,r=0.334,N= 33>, Top: 8( 5 )[ 1 : 8 Fa= 15 : 0.334 ]( 7 , 12 , 14 ),<|>Tot Used: 43 , Added: 12 , Zero Std: 0 , Max Cor: 0.416
#>
4 <R=0.416,r=0.308,N= 33>, Top: 9( 1 )[ 1 : 9 Fa= 16 : 0.308 ]( 8 , 13 , 15 ),<|>Tot Used: 48 , Added: 13 , Zero Std: 0 , Max Cor: 0.400
#>
5 <R=0.400,r=0.300,N= 33>, Top: 5( 2 )[ 1 : 5 Fa= 19 : 0.300 ]( 4 , 5 , 16 ),<|>Tot Used: 48 , Added: 5 , Zero Std: 0 , Max Cor: 0.341
#>
6 <R=0.341,r=0.270,N= 33>, Top: 7( 1 )[ 1 : 7 Fa= 20 : 0.270 ]( 6 , 6 , 19 ),<|>Tot Used: 48 , Added: 6 , Zero Std: 0 , Max Cor: 0.264
#>
7 <R=0.264,r=0.232,N= 33>, Top: 9( 3 )[ 1 : 9 Fa= 21 : 0.232 ]( 7 , 10 , 20 ),<|>Tot Used: 49 , Added: 10 , Zero Std: 0 , Max Cor: 0.266
#>
8 <R=0.266,r=0.233,N= 33>, Top: 1( 1 )[ 1 : 1 Fa= 21 : 0.233 ]( 1 , 1 , 21 ),<|>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.231
#>
9 <R=0.231,r=0.215,N= 16>, Top: 6( 1 )[ 1 : 6 Fa= 22 : 0.215 ]( 6 , 9 , 21 ),<|>Tot Used: 49 , Added: 9 , Zero Std: 0 , Max Cor: 0.227
#>
10 <R=0.227,r=0.214,N= 16>, Top: 3( 2 )[ 1 : 3 Fa= 23 : 0.214 ]( 3 , 4 , 22 ),<|>Tot Used: 49 , Added: 4 , Zero Std: 0 , Max Cor: 0.212
#>
11 <R=0.212,r=0.206,N= 16>, Top: 6( 2 )[ 1 : 6 Fa= 26 : 0.206 ]( 6 , 7 , 23 ),<|>Tot Used: 50 , Added: 7 , Zero Std: 0 , Max Cor: 0.203
#>
12 <R=0.203,r=0.202,N= 16>, Top: 1( 1 )[ 1 : 1 Fa= 26 : 0.202 ]( 1 , 1 , 26 ),<|>Tot Used: 50 , Added: 1 , Zero Std: 0 , Max Cor: 0.200
#>
13 <R=0.200,r=0.200,N= 2>, Top: 1( 1 )[ 1 : 1 Fa= 26 : 0.200 ]( 1 , 1 , 26 ),<|>Tot Used: 50 , Added: 1 , Zero Std: 0 , Max Cor: 0.200
#>
14 <R=0.200,r=0.200,N= 0>
#>
[ 14 ], 0.1997993 Decor Dimension: 50 Nused: 50 . Cor to Base: 22 , ABase: 2 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
204
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
185
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
1.73
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
2.93
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
if (!largeSet)
{
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
par(op)
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
0.1997993
if (nrow(dataframe) < 1000)
{
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}
if (nrow(dataframe) < 1000)
{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| N1 | 2.93 | 1.33 | 3.47 | 1.252 | 0 | 0.614 |
| A4 | 3.81 | 1.11 | 4.17 | 0.973 | 0 | 0.595 |
| A5 | 2.39 | 1.19 | 2.02 | 1.085 | 0 | 0.592 |
| A9 | 3.72 | 1.15 | 4.08 | 1.019 | 0 | 0.592 |
| N2 | 3.46 | 1.18 | 3.09 | 1.150 | 0 | 0.589 |
| N6 | 2.74 | 1.32 | 3.13 | 1.296 | 0 | 0.585 |
| A6 | 3.68 | 1.18 | 4.03 | 1.078 | 0 | 0.585 |
| N3 | 3.62 | 1.22 | 3.99 | 1.060 | 0 | 0.584 |
| A7 | 2.36 | 1.16 | 2.03 | 1.091 | 0 | 0.582 |
| A1 | 2.52 | 1.36 | 2.18 | 1.354 | 0 | 0.580 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]
pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| La_N2 | 4.53 | 1.097 | 4.240 | 1.058 | 5.97e-14 | 0.582 |
| La_N8 | 1.65 | 1.304 | 2.015 | 1.303 | 0.00e+00 | 0.579 |
| La_E2 | 4.86 | 1.167 | 4.558 | 1.094 | 0.00e+00 | 0.578 |
| La_A7 | 2.90 | 1.036 | 2.640 | 0.980 | 0.00e+00 | 0.578 |
| La_A4 | 4.77 | 1.006 | 4.998 | 0.879 | 0.00e+00 | 0.572 |
| La_E9 | 1.05 | 1.172 | 0.785 | 1.202 | 0.00e+00 | 0.564 |
| La_C8 | 3.66 | 1.049 | 3.456 | 0.993 | 4.17e-05 | 0.556 |
| La_C9 | 2.27 | 1.116 | 2.474 | 1.082 | 0.00e+00 | 0.555 |
| La_N1 | 1.65 | 1.016 | 1.810 | 0.935 | 0.00e+00 | 0.553 |
| O10 | 4.12 | 0.946 | 3.936 | 0.999 | 0.00e+00 | 0.552 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 3.3 | 46 | 0.902 |
allSigvars <- names(dc)
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| N1 | NA | 2.93 | 1.328 | 3.471 | 1.252 | 0.00e+00 | 0.614 | 0.614 | NA |
| A4 | NA | 3.81 | 1.111 | 4.172 | 0.973 | 0.00e+00 | 0.595 | 0.595 | NA |
| N2 | NA | 3.46 | 1.184 | 3.094 | 1.150 | 0.00e+00 | 0.589 | 0.589 | NA |
| N6 | NA | 2.74 | 1.322 | 3.134 | 1.296 | 0.00e+00 | 0.585 | 0.585 | NA |
| N3 | NA | 3.62 | 1.221 | 3.986 | 1.060 | 0.00e+00 | 0.584 | 0.584 | NA |
| La_N2 | + 1.000N2 + 0.165N8 + 0.230*N10 | 4.53 | 1.097 | 4.240 | 1.058 | 5.97e-14 | 0.582 | 0.589 | 1 |
| A7 | NA | 2.36 | 1.164 | 2.034 | 1.091 | 0.00e+00 | 0.582 | 0.582 | NA |
| La_N8 | + 1.000N8 -0.259A3 -0.075A4 -0.031A7 | 1.65 | 1.304 | 2.015 | 1.303 | 0.00e+00 | 0.579 | 0.565 | 11 |
| La_E2 | + 0.289E1 + 1.000E2 + 0.338*E3 | 4.86 | 1.167 | 4.558 | 1.094 | 0.00e+00 | 0.578 | 0.569 | 3 |
| La_A7 | + 0.044E1 + 0.343E3 -0.196A3 -0.057A4 + 0.977*A7 | 2.90 | 1.036 | 2.640 | 0.980 | 0.00e+00 | 0.578 | 0.582 | 4 |
| La_A4 | + 1.000A4 + 0.406A7 | 4.77 | 1.006 | 4.998 | 0.879 | 0.00e+00 | 0.572 | 0.595 | 8 |
| E2 | NA | 2.96 | 1.328 | 2.635 | 1.288 | 0.00e+00 | 0.569 | 0.569 | NA |
| N8 | NA | 2.61 | 1.338 | 2.925 | 1.345 | 0.00e+00 | 0.565 | 0.565 | NA |
| La_E9 | -0.368E1 -0.314E3 + 1.000E9 -0.060N8 | 1.05 | 1.172 | 0.785 | 1.202 | 0.00e+00 | 0.564 | 0.545 | -1 |
| A3 | NA | 2.33 | 1.265 | 2.059 | 1.170 | 0.00e+00 | 0.560 | 0.560 | NA |
| La_C8 | + 0.019age -0.189N8 + 0.329C1 + 1.000C8 | 3.66 | 1.049 | 3.456 | 0.993 | 4.17e-05 | 0.556 | 0.534 | -3 |
| La_C9 | -0.411C1 + 0.184C6 + 1.000*C9 | 2.27 | 1.116 | 2.474 | 1.082 | 0.00e+00 | 0.555 | 0.549 | 0 |
| La_N1 | + 1.000N1 + 0.293N2 -0.349N3 -0.313N6 -0.066*N8 | 1.65 | 1.016 | 1.810 | 0.935 | 0.00e+00 | 0.553 | 0.614 | -4 |
| O10 | 4.12 | 0.946 | 3.936 | 0.999 | 0.00e+00 | 0.552 | 0.552 | 7 | |
| C9 | NA | 3.09 | 1.252 | 3.306 | 1.235 | 0.00e+00 | 0.549 | 0.549 | NA |
| E9 | NA | 3.23 | 1.371 | 3.007 | 1.406 | 0.00e+00 | 0.545 | 0.545 | NA |
| age | NA | 26.69 | 11.390 | 25.926 | 11.576 | 0.00e+00 | 0.535 | 0.535 | 1 |
| C8 | NA | 2.57 | 1.153 | 2.428 | 1.116 | 0.00e+00 | 0.534 | 0.534 | NA |
| N10 | NA | 2.76 | 1.303 | 2.883 | 1.317 | 0.00e+00 | 0.526 | 0.526 | NA |
| E3 | NA | 3.38 | 1.233 | 3.439 | 1.236 | 0.00e+00 | 0.513 | 0.513 | NA |
| C1 | NA | 3.30 | 1.106 | 3.330 | 1.093 | 0.00e+00 | 0.507 | 0.507 | 6 |
| E1 | NA | 2.62 | 1.244 | 2.635 | 1.225 | 0.00e+00 | 0.504 | 0.504 | 9 |
| C6 | NA | 2.92 | 1.382 | 2.926 | 1.408 | 0.00e+00 | 0.501 | 0.501 | NA |
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}
pander::pander(table(dataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 10277 | 1554 |
| 1 | 5288 | 2184 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.194 | 0.188 | 0.199 |
| tp | 0.387 | 0.380 | 0.394 |
| se | 0.292 | 0.282 | 0.303 |
| sp | 0.869 | 0.862 | 0.875 |
| diag.ac | 0.646 | 0.639 | 0.652 |
| diag.or | 2.731 | 2.539 | 2.938 |
| nndx | 6.213 | 5.636 | 6.924 |
| youden | 0.161 | 0.144 | 0.177 |
| pv.pos | 0.584 | 0.568 | 0.600 |
| pv.neg | 0.660 | 0.653 | 0.668 |
| lr.pos | 2.225 | 2.099 | 2.359 |
| lr.neg | 0.815 | 0.802 | 0.828 |
| p.rout | 0.806 | 0.801 | 0.812 |
| p.rin | 0.194 | 0.188 | 0.199 |
| p.tpdn | 0.131 | 0.125 | 0.138 |
| p.tndp | 0.708 | 0.697 | 0.718 |
| p.dntp | 0.416 | 0.400 | 0.432 |
| p.dptn | 0.340 | 0.332 | 0.347 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 2184 | 1554 | 3738 |
| Test - | 5288 | 10277 | 15565 |
| Total | 7472 | 11831 | 19303 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.646 | 0.639 | 0.652 |
| 3 | se | 0.292 | 0.282 | 0.303 |
| 4 | sp | 0.869 | 0.862 | 0.875 |
| 6 | diag.or | 2.731 | 2.539 | 2.938 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}
pander::pander(table(DEdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 10923 | 908 |
| 1 | 5970 | 1502 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.1249 | 0.120 | 0.1296 |
| tp | 0.3871 | 0.380 | 0.3940 |
| se | 0.2010 | 0.192 | 0.2103 |
| sp | 0.9233 | 0.918 | 0.9280 |
| diag.ac | 0.6437 | 0.637 | 0.6504 |
| diag.or | 3.0266 | 2.771 | 3.3057 |
| nndx | 8.0470 | 7.232 | 9.0670 |
| youden | 0.1243 | 0.110 | 0.1383 |
| pv.pos | 0.6232 | 0.604 | 0.6426 |
| pv.neg | 0.6466 | 0.639 | 0.6538 |
| lr.pos | 2.6192 | 2.425 | 2.8292 |
| lr.neg | 0.8654 | 0.855 | 0.8763 |
| p.rout | 0.8751 | 0.870 | 0.8798 |
| p.rin | 0.1249 | 0.120 | 0.1296 |
| p.tpdn | 0.0767 | 0.072 | 0.0817 |
| p.tndp | 0.7990 | 0.790 | 0.8080 |
| p.dntp | 0.3768 | 0.357 | 0.3965 |
| p.dptn | 0.3534 | 0.346 | 0.3607 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 1502 | 908 | 2410 |
| Test - | 5970 | 10923 | 16893 |
| Total | 7472 | 11831 | 19303 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.644 | 0.637 | 0.650 |
| 3 | se | 0.201 | 0.192 | 0.210 |
| 4 | sp | 0.923 | 0.918 | 0.928 |
| 6 | diag.or | 3.027 | 2.771 | 3.306 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}
pander::pander(table(PCAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 10210 | 1621 |
| 1 | 5147 | 2325 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.204 | 0.199 | 0.210 |
| tp | 0.387 | 0.380 | 0.394 |
| se | 0.311 | 0.301 | 0.322 |
| sp | 0.863 | 0.857 | 0.869 |
| diag.ac | 0.649 | 0.643 | 0.656 |
| diag.or | 2.845 | 2.648 | 3.057 |
| nndx | 5.742 | 5.237 | 6.356 |
| youden | 0.174 | 0.157 | 0.191 |
| pv.pos | 0.589 | 0.574 | 0.605 |
| pv.neg | 0.665 | 0.657 | 0.672 |
| lr.pos | 2.271 | 2.146 | 2.403 |
| lr.neg | 0.798 | 0.785 | 0.812 |
| p.rout | 0.796 | 0.790 | 0.801 |
| p.rin | 0.204 | 0.199 | 0.210 |
| p.tpdn | 0.137 | 0.131 | 0.143 |
| p.tndp | 0.689 | 0.678 | 0.699 |
| p.dntp | 0.411 | 0.395 | 0.426 |
| p.dptn | 0.335 | 0.328 | 0.343 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 2325 | 1621 | 3946 |
| Test - | 5147 | 10210 | 15357 |
| Total | 7472 | 11831 | 19303 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.649 | 0.643 | 0.656 |
| 3 | se | 0.311 | 0.301 | 0.322 |
| 4 | sp | 0.863 | 0.857 | 0.869 |
| 6 | diag.or | 2.845 | 2.648 | 3.057 |
par(op)
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}
pander::pander(table(EFAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 10198 | 1633 |
| 1 | 5151 | 2321 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.205 | 0.199 | 0.211 |
| tp | 0.387 | 0.380 | 0.394 |
| se | 0.311 | 0.300 | 0.321 |
| sp | 0.862 | 0.856 | 0.868 |
| diag.ac | 0.649 | 0.642 | 0.655 |
| diag.or | 2.814 | 2.619 | 3.023 |
| nndx | 5.794 | 5.280 | 6.420 |
| youden | 0.173 | 0.156 | 0.189 |
| pv.pos | 0.587 | 0.571 | 0.602 |
| pv.neg | 0.664 | 0.657 | 0.672 |
| lr.pos | 2.250 | 2.127 | 2.381 |
| lr.neg | 0.800 | 0.786 | 0.813 |
| p.rout | 0.795 | 0.789 | 0.801 |
| p.rin | 0.205 | 0.199 | 0.211 |
| p.tpdn | 0.138 | 0.132 | 0.144 |
| p.tndp | 0.689 | 0.679 | 0.700 |
| p.dntp | 0.413 | 0.398 | 0.429 |
| p.dptn | 0.336 | 0.328 | 0.343 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 2321 | 1633 | 3954 |
| Test - | 5151 | 10198 | 15349 |
| Total | 7472 | 11831 | 19303 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.649 | 0.642 | 0.655 |
| 3 | se | 0.311 | 0.300 | 0.321 |
| 4 | sp | 0.862 | 0.856 | 0.868 |
| 6 | diag.or | 2.814 | 2.619 | 3.023 |
par(op)
theLaFormulas <- getLatentCoefficients(DEdataframe)
pander::pander(theLaFormulas)
La_E2:
| E1 | E2 | E3 |
|---|---|---|
| 0.289 | 1 | 0.338 |
La_E3:
| E1 | E3 | E7 | N8 | N10 |
|---|---|---|---|---|
| -0.555 | 0.857 | 0.212 | 0.0823 | 0.206 |
La_E4:
| E1 | E2 | E3 | E4 |
|---|---|---|---|
| 0.239 | -0.305 | 0.218 | 1 |
La_E5:
| E1 | E2 | E3 | E5 | E7 |
|---|---|---|---|---|
| 0.0619 | 0.214 | -0.278 | 1 | -0.389 |
La_E6:
| E1 | E2 | E3 | E6 | A3 | A4 | A7 | O10 |
|---|---|---|---|---|---|---|---|
| 0.168 | -0.398 | -0.0625 | 1 | -0.0411 | -0.012 | -0.163 | 0.208 |
La_E7:
| E1 | E2 | E3 | E7 |
|---|---|---|---|
| -0.305 | 0.201 | -0.605 | 1 |
La_E8:
| E8 | E9 |
|---|---|
| 1 | 0.468 |
La_E9:
| E1 | E3 | E9 | N8 |
|---|---|---|---|
| -0.368 | -0.314 | 1 | -0.0598 |
La_E10:
| E1 | E2 | E3 | E4 | E7 | E10 |
|---|---|---|---|---|---|
| 0.0377 | -0.137 | 0.063 | -0.228 | 0.293 | 1 |
La_N1:
| N1 | N2 | N3 | N6 | N8 |
|---|---|---|---|---|
| 1 | 0.293 | -0.349 | -0.313 | -0.0664 |
La_N2:
| N2 | N8 | N10 |
|---|---|---|
| 1 | 0.165 | 0.23 |
La_N3:
| N2 | N3 | N6 | N8 |
|---|---|---|---|
| 0.224 | 1 | -0.313 | -0.0695 |
La_N4:
| N4 | N8 | N10 |
|---|---|---|
| 1 | 0.0472 | 0.341 |
La_N5:
| N5 | N6 | N8 |
|---|---|---|
| 1 | -0.366 | -0.176 |
La_N6:
| N2 | N6 | N8 |
|---|---|---|
| 0.261 | 1 | -0.453 |
La_N7:
| N7 | N8 |
|---|---|
| 1 | -0.739 |
La_N8:
| N8 | A3 | A4 | A7 |
|---|---|---|---|
| 1 | -0.259 | -0.0754 | -0.0306 |
La_N9:
| N6 | N8 | N9 | A3 |
|---|---|---|---|
| -0.442 | -0.276 | 1 | -0.192 |
La_N10:
| N8 | N10 |
|---|---|
| -0.526 | 1 |
La_A1:
| A1 | A4 | A7 |
|---|---|---|
| 1 | 0.291 | -0.298 |
La_A2:
| A2 | A4 | A7 |
|---|---|---|
| 1 | -0.188 | 0.471 |
La_A3:
| A3 | A4 | A7 |
|---|---|---|
| 1 | 0.291 | 0.118 |
La_A4:
| A4 | A7 |
|---|---|
| 1 | 0.406 |
La_A5:
| A4 | A5 | A7 |
|---|---|---|
| 0.347 | 1 | -0.428 |
La_A6:
| A4 | A6 |
|---|---|
| -0.553 | 1 |
La_A7:
| E1 | E3 | A3 | A4 | A7 |
|---|---|---|---|---|
| 0.044 | 0.343 | -0.196 | -0.0572 | 0.977 |
La_A8:
| A4 | A7 | A8 |
|---|---|---|
| -0.351 | 0.212 | 1 |
La_A9:
| A4 | A6 | A9 |
|---|---|---|
| -0.567 | -0.196 | 1 |
La_A10:
| E1 | E3 | A4 | A7 | A10 |
|---|---|---|---|---|
| -0.138 | -0.273 | -0.224 | -0.0909 | 1 |
La_C2:
| C2 | C6 |
|---|---|
| 1 | -0.498 |
La_C3:
| C3 | C10 |
|---|---|
| 1 | -0.334 |
La_C4:
| N8 | C2 | C4 | C6 |
|---|---|---|---|
| -0.241 | -0.193 | 1 | -0.312 |
La_C5:
| C1 | C5 | C6 | C9 |
|---|---|---|---|
| -0.193 | 1 | 0.346 | -0.227 |
La_C6:
| C1 | C6 |
|---|---|
| 0.399 | 1 |
La_C7:
| C1 | C7 | C9 |
|---|---|---|
| -0.18 | 1 | -0.299 |
La_C8:
| age | N8 | C1 | C8 |
|---|---|---|---|
| 0.0187 | -0.189 | 0.329 | 1 |
La_C9:
| C1 | C6 | C9 |
|---|---|---|
| -0.411 | 0.184 | 1 |
La_C10:
| C1 | C10 |
|---|---|
| -0.304 | 1 |
La_O1:
| O1 | O10 |
|---|---|
| 1 | -0.341 |
La_O2:
| O1 | O2 | O10 |
|---|---|---|
| 0.333 | 1 | 0.248 |
La_O3:
| O3 | O10 |
|---|---|
| 1 | -0.442 |
La_O4:
| O2 | O4 |
|---|---|
| -0.523 | 1 |
La_O5:
| O5 | O10 |
|---|---|
| 1 | -0.595 |
La_O6:
| O3 | O6 | O10 |
|---|---|---|
| 0.558 | 1 | 0.223 |
La_O7:
| O2 | O5 | O7 | O10 |
|---|---|---|---|
| 0.298 | -0.239 | 1 | -0.0576 |
La_O8:
| O1 | O8 |
|---|---|
| -0.715 | 1 |